The samples correspond to the four transects from the Spanish and
French side of the Pyrenees.
Per transect we have 4-6 sites, running from 900m to 2400 ASL and per
sampling point we have 4-5 individuals sampled.
Coassemblies had been formed per elevation, having 10 coassembly groups
per country and running from the lowest elevation to the highest.
The MAGs have been dereplicated all together.
| Country | Transect | Site | Elevation |
|---|---|---|---|
| Spain | Aisa | A | 1250m |
| B | 1450m | ||
| C | 1650m | ||
| D | 1850m | ||
| Aran | E | 1000m | |
| F | 1080m | ||
| G | 1340m | ||
| H | 1500m | ||
| I | 1650m | ||
| J | 1850m | ||
| France | Tourmalet | K | 941m |
| L | 1260m | ||
| K | 1628m | ||
| N | 1873m | ||
| Sentein | O | 953m | |
| P | 1255m | ||
| Q | 1561m | ||
| R | 1797m | ||
| S | 2100m | ||
| T | 2306m |
library(tidyverse)
library(ape)
library(devtools)
library(ggplot2)
library(ggpubr)
library(hilldiv2)
library (knitr)
library(spaa)
library(vegan)
library(lme4)
counts_file="data/DMB0113_counts.tsv"
tree_file="data/DMB0113.tree"
taxonomy_file="data/DMB0113_mag_info.tsv"
metadata_file="data/Pyrenees_metadata_all.tsv"
coverage_file="data/DMB0113_coverage.tsv"
gift_file="data/GIFTs.tsv"
count_table <- read.table(counts_file,header=T, sep="\t",row.names=1)
tree <- read.tree(tree_file)
mags_table <- read.table(taxonomy_file,sep="\t",header=T)
rownames(mags_table) <- mags_table[,1] # add row names
metadata <- read.table(metadata_file,header=T, sep=",")
coverage_table <- read.table(coverage_file,header=T, sep="\t", row.names=1)
GIFTs_table <- read.table(gift_file,header=T, sep="\t", row.names=1)
# Download and load the phylum colour table
colours_URL="https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv"
download.file(colours_URL, "data/ehi_phylum_colors.tsv")
ehi_phylum_colors <- read.table("data/ehi_phylum_colors.tsv",sep="\t",header=T,comment.char = "")
After the EHI report we saw that EHI00102 should be excluded for excessive host DNA, and EHI00182 due to excessive not known content from the Spanish Transects. EHI00435 had 0 of bins and 0% of assembly mapping so was also excluded from the French Transect.
#Counts_raw
columns_to_exclude <- c("EHI00102", "EHI00182","EHI00435") # Columns to exclude
count_table <- count_table %>%
select(-columns_to_exclude)
#Metadata
metadata <- metadata %>%
filter(EHI_number != "EHI00102") %>%
filter(EHI_number != "EHI00182") %>%
filter(EHI_number !="EHI00435")
#Coverage_table
columns_to_exclude <- c("EHI00102", "EHI00182","EHI00435") # Columns to exclude
coverage_table <- coverage_table %>%
select(-columns_to_exclude)
sequencing_depth <- colSums(count_table)
mag_details <- mags_table %>%
select(c(genome,domain,phylum,completeness,contamination,mag_size)) %>%
mutate(mag_size=round(mag_size/1000000,2)) %>% #change mag_size to MBs
rename(comp=completeness,cont=contamination,size=mag_size) %>% #rename columns
remove_rownames() %>%
arrange(match(genome, rev(tree$tip.label))) #sort MAGs according to phylogenetic tree
sequence_fractions <- count_table %>%
rownames_to_column("Genome") %>%
pivot_longer(-Genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(metadata, by = join_by(sample == EHI_number)) %>%
select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
mutate(mags_bases = mags*146) %>%
mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample,mags_bases,unmapped_bases,host_bases,lowqual_bases)
mags_bases_mean <- sequence_fractions %>%
mutate(mags_bases = mags_bases / 1000000000) %>%
select(mags_bases) %>%
pull() %>%
mean()
min_coverage=0.3
count_table_cov <- coverage_table %>%
mutate(across(everything(), ~ ifelse(. > min_coverage, 1, 0))) %>%
map2_df(., count_table, ~ .x * .y) %>%
as.data.frame()
rownames(count_table_cov) <- rownames(coverage_table)
genome_read_sizes <- mags_table[rownames(count_table_cov),] %>%
select(mag_size) %>%
mutate(mag_size = mag_size / 150) %>%
pull()
count_table_cov_size <- sweep(count_table_cov, 1, genome_read_sizes, "/")
The count data table requires some modifications before generating the plots, including TSS normalisation, appending taxonomy and metadata information etc. The following script generates a tibble that can then be used to plot identical stacked barplots coloured at different taxonomic levels.
count_table_cov_size_pivot <- count_table_cov_size %>%
rownames_to_column("Genome") %>%
mutate_at(vars(-Genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-Genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., mags_table, by = join_by(Genome == genome)) %>% #append taxonomy
mutate(phylum = fct_relevel(phylum, rev(ehi_phylum_colors$phylum))) #sort phyla by taxonomy
# Retrieve taxonomy colors to use standardised EHI colors
phylum_colors <- ehi_phylum_colors %>%
filter(phylum %in% unique(count_table_cov_size_pivot$phylum)) %>%
select(colors) %>%
pull() %>%
rev()
phylum_colors <- c(phylum_colors,"#cccccc") #REMOVE! ONLY FOR ARCHAEANS
count_table_cov_size_met <- count_table_cov_size %>%
rownames_to_column("Genome") %>%
mutate_at(vars(-Genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-Genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., mags_table, by = join_by(Genome == genome)) %>% #append taxonomy
left_join(., metadata, by = join_by(sample == EHI_number)) %>%
mutate(phylum = fct_relevel(phylum, rev(ehi_phylum_colors$phylum))) #sort phyla by taxonomy
count_table_cov_size_met$Elevation<-as.factor(count_table_cov_size_met$Elevation)
# Create an interaction variable for elevation and sample
count_table_cov_size_met$interaction_var <- interaction(count_table_cov_size_met$sample, count_table_cov_size_met$Elevation)
# Plot stacked barplot
ggplot(count_table_cov_size_met, aes(x=interaction_var,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(y = "Relative abundance", x="Elevation (m)") +
guides(fill = guide_legend(ncol = 3)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size=7))+
scale_x_discrete(labels = function(x) gsub(".*\\.", "", x)) +
facet_wrap(~Transect, scales = "free", labeller = as_labeller(function(label) gsub(".*\\.", "", label))) #only show elevation label
In order to avoid issues with diversity computation is recommendable to remove samples and MAGs without count data.
#Get list of present MAGs
present_MAGs <- count_table_cov_size %>%
filter(rowSums(.[, -1]) != 0) %>%
rownames()
#Remove samples with all zeros (no data after filtering)
count_table_cov_size <- count_table_cov_size %>%
select_if(~!all(. == 0))
#Align KEGG annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(GIFTs_table)]
GIFTs_table_filt <- GIFTs_table[present_MAGs,] %>%
select_if(~!all(. == 0)) %>% #remove all-zero modules
select_if(~!all(. == 1)) #remove all-one modules
#Filter count table to only contain present MAGs after KEGG filtering
count_table_cov_size_filt <- count_table_cov_size[present_MAGs,]
q0n <- hilldiv(count_table_cov_size,q=0) %>% c()
q1n <- hilldiv(count_table_cov_size,q=1) %>% c()
q1p <- hilldiv(count_table_cov_size,q=1,tree=tree) %>% c()
dist <- traits2dist(GIFTs_table_filt, method="gower")
q1f <- hilldiv(count_table_cov_size_filt,q=1,dist=dist) %>% c()
# Merge all metrics
alpha_div <- cbind(sample=colnames(count_table_cov_size),richness=q0n,neutral=round(q1n,3),phylo=round(q1p,3),func=round(q1f,3)) %>%
as.data.frame()
columns <- c("richness","neutral","phylo","func","mapped","total")
# Add amount of sequencing data to the table
alpha_div <- alpha_div %>%
left_join(sequence_fractions, by = join_by(sample == sample)) %>% #add sequencing depth information
mutate(mapped=round(mags_bases/1000000000,3)) %>% #modify depth to million reads
mutate(total=round((mags_bases+unmapped_bases+host_bases+lowqual_bases)/1000000000,3)) %>%
select(sample,richness,neutral,phylo,func,mapped,total) %>%
mutate(across(-1, as.numeric))
alpha_div %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
ggplot(aes(x=value, y=sample)) +
geom_bar(stat='identity', fill="#6c9ebc") +
facet_wrap(~data, scales="free_x", ncol=6) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line( size=.1, color="grey" ),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
You can also generate an HTML table using knitr.
| sample | richness | neutral | phylo | func | mapped | total |
|---|---|---|---|---|---|---|
| EHI00077 | 110 | 46.192 | 6.061 | 1.501 | 1.395 | 2.560 |
| EHI00107 | 204 | 76.436 | 6.379 | 1.522 | 3.059 | 5.209 |
| EHI00529 | 275 | 113.857 | 5.967 | 1.497 | 5.113 | 6.888 |
| EHI00080 | 228 | 105.145 | 4.749 | 1.515 | 2.688 | 7.384 |
| EHI00451 | 356 | 193.233 | 5.600 | 1.473 | 5.079 | 7.099 |
| EHI00484 | 257 | 141.082 | 5.420 | 1.487 | 5.256 | 7.216 |
| EHI00493 | 209 | 45.121 | 5.484 | 1.444 | 2.478 | 7.621 |
| EHI00512 | 122 | 61.881 | 5.274 | 1.494 | 1.343 | 10.637 |
| EHI00538 | 223 | 61.984 | 7.135 | 1.505 | 3.164 | 6.892 |
| EHI00176 | 143 | 73.430 | 3.658 | 1.439 | 2.747 | 4.271 |
| EHI00524 | 242 | 104.931 | 5.579 | 1.476 | 4.900 | 8.769 |
| EHI00527 | 336 | 142.813 | 5.368 | 1.505 | 7.611 | 10.612 |
| EHI00126 | 72 | 33.541 | 3.842 | 1.428 | 1.156 | 3.125 |
| EHI00177 | 148 | 81.559 | 4.478 | 1.468 | 2.164 | 2.958 |
| EHI00070 | 120 | 71.590 | 4.880 | 1.468 | 1.636 | 3.642 |
| EHI00133 | 10 | 6.304 | 3.620 | 1.645 | 3.375 | 4.410 |
| EHI00438 | 284 | 141.640 | 4.556 | 1.469 | 4.755 | 5.998 |
| EHI00097 | 128 | 44.096 | 4.676 | 1.411 | 2.969 | 4.663 |
| EHI00117 | 305 | 128.083 | 5.424 | 1.475 | 7.078 | 9.432 |
| EHI00525 | 182 | 55.439 | 4.519 | 1.417 | 5.053 | 6.625 |
| EHI00112 | 251 | 124.839 | 6.194 | 1.506 | 3.647 | 5.579 |
| EHI00124 | 150 | 24.911 | 3.853 | 1.464 | 3.273 | 5.395 |
| EHI00134 | 270 | 116.444 | 5.665 | 1.493 | 3.943 | 8.445 |
| EHI00111 | 199 | 82.627 | 4.517 | 1.459 | 3.577 | 5.368 |
| EHI00181 | 165 | 48.697 | 3.653 | 1.425 | 2.446 | 3.418 |
| EHI00472 | 247 | 111.902 | 5.486 | 1.496 | 3.373 | 6.129 |
| EHI00113 | 355 | 165.915 | 7.250 | 1.523 | 7.325 | 10.299 |
| EHI00480 | 246 | 112.783 | 6.460 | 1.498 | 4.355 | 9.044 |
| EHI00129 | 200 | 127.140 | 5.603 | 1.486 | 2.774 | 4.005 |
| EHI00069 | 198 | 75.741 | 4.376 | 1.436 | 4.106 | 8.222 |
| EHI00085 | 130 | 68.711 | 5.308 | 1.496 | 1.928 | 2.937 |
| EHI00103 | 85 | 41.575 | 5.409 | 1.447 | 1.340 | 2.958 |
| EHI00104 | 112 | 63.946 | 4.724 | 1.483 | 1.257 | 2.759 |
| EHI00518 | 258 | 134.649 | 3.744 | 1.451 | 5.838 | 7.271 |
| EHI00499 | 126 | 37.894 | 4.736 | 1.466 | 3.208 | 12.356 |
| EHI00464 | 328 | 171.164 | 6.195 | 1.507 | 5.053 | 8.125 |
| EHI00422 | 138 | 71.029 | 4.384 | 1.430 | 2.330 | 3.018 |
| EHI00470 | 289 | 112.183 | 5.790 | 1.498 | 8.184 | 12.393 |
| EHI00507 | 347 | 148.480 | 6.979 | 1.523 | 5.508 | 7.600 |
| EHI00462 | 160 | 63.579 | 3.838 | 1.414 | 4.849 | 7.080 |
| EHI00108 | 95 | 48.417 | 3.356 | 1.426 | 2.744 | 4.016 |
| EHI00456 | 185 | 38.378 | 4.628 | 1.431 | 4.750 | 6.055 |
| EHI00541 | 212 | 116.678 | 4.245 | 1.478 | 6.835 | 9.111 |
| EHI00441 | 223 | 49.507 | 4.360 | 1.480 | 6.831 | 9.875 |
| EHI00537 | 108 | 54.580 | 6.505 | 1.482 | 1.449 | 4.368 |
| EHI00121 | 114 | 29.138 | 3.984 | 1.419 | 1.945 | 4.307 |
| EHI00088 | 167 | 70.419 | 5.543 | 1.458 | 3.014 | 3.869 |
| EHI00125 | 208 | 30.253 | 4.647 | 1.398 | 3.162 | 5.015 |
| EHI00467 | 227 | 126.959 | 5.448 | 1.509 | 3.759 | 6.687 |
| EHI00547 | 279 | 93.954 | 5.620 | 1.462 | 6.176 | 16.900 |
| EHI00504 | 185 | 46.309 | 5.150 | 1.464 | 4.966 | 8.939 |
| EHI00178 | 204 | 89.231 | 6.236 | 1.521 | 2.604 | 4.254 |
| EHI00433 | 254 | 77.527 | 6.064 | 1.489 | 3.707 | 6.733 |
| EHI00473 | 307 | 80.589 | 4.616 | 1.403 | 4.804 | 7.014 |
| EHI00426 | 188 | 30.266 | 4.941 | 1.391 | 5.675 | 8.264 |
| EHI00130 | 134 | 73.953 | 5.332 | 1.486 | 2.115 | 3.506 |
| EHI00138 | 210 | 125.604 | 4.132 | 1.463 | 2.858 | 3.887 |
| EHI00110 | 15 | 9.317 | 2.420 | 1.613 | 2.615 | 4.130 |
| EHI00490 | 324 | 75.301 | 4.668 | 1.441 | 7.830 | 12.233 |
| EHI00569 | 310 | 124.141 | 5.534 | 1.501 | 6.796 | 10.328 |
| EHI00095 | 106 | 35.074 | 3.829 | 1.411 | 1.924 | 2.702 |
| EHI00496 | 416 | 184.827 | 6.250 | 1.512 | 6.170 | 8.743 |
| EHI00568 | 195 | 100.653 | 6.274 | 1.482 | 3.592 | 6.135 |
| EHI00089 | 66 | 8.483 | 3.331 | 1.303 | 1.958 | 2.684 |
| EHI00137 | 162 | 87.929 | 4.826 | 1.521 | 2.484 | 4.064 |
| EHI00428 | 291 | 135.956 | 5.698 | 1.488 | 3.738 | 5.516 |
| EHI00131 | 131 | 94.748 | 5.862 | 1.525 | 1.205 | 2.829 |
| EHI00139 | 163 | 62.558 | 4.467 | 1.496 | 2.143 | 3.418 |
| EHI00119 | 237 | 70.875 | 3.944 | 1.406 | 4.676 | 6.687 |
| EHI00180 | 195 | 117.704 | 4.431 | 1.487 | 2.470 | 3.822 |
| EHI00465 | 122 | 49.104 | 4.014 | 1.482 | 1.426 | 2.312 |
| EHI00092 | 30 | 3.631 | 1.669 | 1.239 | 4.918 | 6.506 |
| EHI00114 | 197 | 64.335 | 6.231 | 1.508 | 4.664 | 7.563 |
| EHI00076 | 212 | 86.984 | 7.133 | 1.518 | 3.839 | 5.673 |
| EHI00514 | 251 | 113.086 | 4.408 | 1.472 | 6.257 | 8.555 |
| EHI00073 | 189 | 135.139 | 4.091 | 1.474 | 1.845 | 2.649 |
| EHI00098 | 268 | 155.197 | 6.331 | 1.519 | 3.150 | 5.235 |
| EHI00506 | 279 | 140.754 | 5.420 | 1.498 | 8.088 | 11.265 |
| EHI00567 | 266 | 104.696 | 5.185 | 1.477 | 4.447 | 6.941 |
| EHI00483 | 314 | 173.976 | 4.687 | 1.458 | 4.740 | 7.592 |
| EHI00122 | 255 | 121.624 | 4.852 | 1.539 | 5.388 | 7.555 |
| EHI00458 | 234 | 75.277 | 5.411 | 1.463 | 4.919 | 7.535 |
| EHI00074 | 323 | 99.228 | 4.859 | 1.424 | 5.753 | 8.148 |
| EHI00566 | 347 | 159.247 | 6.158 | 1.512 | 7.378 | 9.891 |
| EHI00481 | 250 | 108.034 | 5.670 | 1.486 | 5.379 | 7.352 |
| EHI00086 | 129 | 66.084 | 4.904 | 1.560 | 1.744 | 3.586 |
| EHI00100 | 247 | 86.648 | 5.982 | 1.502 | 4.109 | 7.769 |
| EHI00116 | 151 | 70.529 | 2.736 | 1.453 | 4.727 | 6.036 |
| EHI00093 | 165 | 98.316 | 6.863 | 1.522 | 2.314 | 4.080 |
| EHI00479 | 303 | 176.977 | 5.788 | 1.507 | 12.816 | 15.723 |
| EHI00128 | 189 | 99.762 | 5.402 | 1.499 | 2.965 | 4.539 |
| EHI00105 | 183 | 95.825 | 5.586 | 1.496 | 2.731 | 3.917 |
| EHI00081 | 113 | 66.235 | 5.433 | 1.509 | 1.336 | 2.843 |
| EHI00075 | 164 | 62.109 | 5.875 | 1.485 | 2.490 | 4.164 |
| EHI00536 | 300 | 151.070 | 3.489 | 1.440 | 7.718 | 9.454 |
| EHI00115 | 202 | 111.774 | 4.067 | 1.462 | 3.009 | 6.308 |
| EHI00477 | 231 | 89.238 | 5.775 | 1.496 | 3.732 | 7.016 |
| EHI00106 | 48 | 12.723 | 4.030 | 1.434 | 2.075 | 6.112 |
| EHI00120 | 230 | 94.185 | 4.462 | 1.461 | 3.812 | 4.935 |
| EHI00091 | 161 | 77.291 | 4.265 | 1.456 | 2.492 | 3.810 |
| EHI00079 | 82 | 61.050 | 5.092 | 1.481 | 0.808 | 4.433 |
| EHI00072 | 277 | 118.923 | 5.177 | 1.470 | 4.043 | 5.444 |
| EHI00179 | 150 | 65.405 | 6.039 | 1.479 | 2.393 | 3.538 |
| EHI00101 | 111 | 63.702 | 5.236 | 1.472 | 1.994 | 8.101 |
| EHI00488 | 279 | 146.485 | 7.153 | 1.501 | 4.363 | 9.151 |
| EHI00118 | 252 | 119.896 | 4.463 | 1.490 | 3.701 | 5.786 |
Alpha diversities can be compared across any categorical features that group analysed samples (e.g., localities, sampling seasons, sex), or continuous variables associated with the host animals.
Let’s first create a nice colour palette for the localities
alpha_colors <- c("#e5bd5b","#6b7398","#76b183","#d57d2c","#2a2d26","#f9d4cc","#3c634e","#ea68c3")
Let’s also identify the number of comparing groups, so that the colour palette can be subsetted properly when plotting the figures.
group_n <- alpha_div %>% select(sample,neutral) %>%
left_join(metadata, by = join_by(sample == EHI_number)) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
select(location) %>% pull() %>% unique() %>% length()
alpha_div %>%
select(sample,neutral) %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
left_join(metadata, by = join_by(sample == EHI_number)) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
ggboxplot(., x = "location", y = "value", color = "Transect") +
scale_color_manual(values=alpha_colors[c(1:group_n)]) +
scale_fill_manual(values=paste0(alpha_colors[c(1:group_n)])) +
stat_compare_means() +
theme_classic() +
labs(y = "Neutral Hill numbers") +
theme(
legend.position = "top",
legend.box = "horizontal",
axis.title.x = element_blank(),
axis.text.x = element_blank()) +
guides(color=guide_legend(title="Location"))
alpha_div %>%
select(sample,phylo) %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
left_join(metadata, by = join_by(sample == EHI_number)) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
ggboxplot(., x = "location", y = "value", color = "Transect") +
scale_color_manual(values=alpha_colors[c(1:group_n)]) +
scale_fill_manual(values=paste0(alpha_colors[c(1:group_n)])) +
stat_compare_means() +
theme_classic() +
labs(y = "Phylogenetic Hill numbers") +
theme(
legend.position = "top",
legend.box = "horizontal",
axis.title.x = element_blank(),
axis.text.x = element_blank()) +
guides(color=guide_legend(title="Location"))
alpha_div %>%
select(sample,func) %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
left_join(metadata, by = join_by(sample == EHI_number)) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
ggboxplot(., x = "location", y = "value", color = "Transect") +
scale_color_manual(values=alpha_colors[c(1:group_n)]) +
scale_fill_manual(values=paste0(alpha_colors[c(1:group_n)])) +
stat_compare_means() +
theme_classic() +
labs(y = "Functional Hill numbers") +
theme(
legend.position = "top",
legend.box = "horizontal",
axis.title.x = element_blank(),
axis.text.x = element_blank()) +
guides(color=guide_legend(title="Location"))
alpha_div_neutral_met<-alpha_div %>%
select(sample,neutral) %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
left_join(metadata, by = join_by(sample == EHI_number))
ggplot(alpha_div_neutral_met, aes(x = Elevation, y = value)) +
geom_point() +
geom_smooth(method = lm) +
facet_wrap(~ factor(Transect))+
labs(x = "Alpha div. neutral")
alpha_div_phylo_met<-alpha_div %>%
select(sample,phylo) %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
left_join(metadata, by = join_by(sample == EHI_number))
ggplot(alpha_div_phylo_met, aes(x = Elevation, y = value)) +
geom_point() +
geom_smooth(method = lm) +
facet_wrap(~ factor(Transect))+
labs(x = "Alpha div. phylogenetic")
alpha_div_func_met<-alpha_div %>%
select(sample,func) %>%
pivot_longer(-sample, names_to = "data", values_to = "value") %>%
mutate(data = factor(data, levels = columns)) %>%
left_join(metadata, by = join_by(sample == EHI_number))
ggplot(alpha_div_func_met, aes(x = Elevation, y = value)) +
geom_point() +
geom_smooth(method = lm) +
facet_wrap(~ factor(Transect))+
labs(x = "Alpha div. functional")
#Create mixed model formula
formula <- formula(value ~ log(sequencing_depth)+Elevation*Transect+(1|Sampling_point))
#Fit the mixed model
model <- lmer(formula, data = alpha_div_neutral_met)
#Print the model summary
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## value ~ log(sequencing_depth) + Elevation * Transect + (1 | Sampling_point)
## Data: alpha_div_neutral_met
##
## REML criterion at convergence: 1047.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.24390 -0.64170 0.02559 0.73848 2.09339
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sampling_point (Intercept) 244.6 15.64
## Residual 1202.5 34.68
## Number of obs: 106, groups: Sampling_point, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -512.50505 158.19789 -3.240
## log(sequencing_depth) 38.59935 8.20775 4.703
## Elevation -0.03567 0.04672 -0.763
## TransectAran -62.67784 83.52588 -0.750
## TransectSentein -88.55744 80.19128 -1.104
## TransectTourmalet -65.10407 86.01919 -0.757
## Elevation:TransectAran 0.03950 0.05470 0.722
## Elevation:TransectSentein 0.05493 0.05052 1.087
## Elevation:TransectTourmalet 0.05451 0.05614 0.971
##
## Correlation of Fixed Effects:
## (Intr) lg(s_) Elevtn TrnscA TrnscS TrnscT Elv:TA Elv:TS
## lg(sqncng_) -0.888
## Elevation -0.476 0.024
## TransectArn -0.455 0.062 0.862
## TransctSntn -0.367 -0.056 0.895 0.786
## TrnsctTrmlt -0.373 -0.018 0.836 0.735 0.768
## Elvtn:TrnsA 0.419 -0.035 -0.855 -0.985 -0.764 -0.714
## Elvtn:TrnsS 0.416 0.005 -0.924 -0.796 -0.983 -0.773 0.789
## Elvtn:TrnsT 0.381 -0.003 -0.832 -0.717 -0.746 -0.983 0.711 0.769
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
MuMIn::r.squaredGLMM(model)
## R2m R2c
## [1,] 0.276091 0.3984537
#Create mixed model formula
formula <- formula(value ~ log(sequencing_depth)+Elevation*Transect+(1|Sampling_point))
#Fit the mixed model
model <- lmer(formula, data = alpha_div_phylo_met)
#Print the model summary
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## value ~ log(sequencing_depth) + Elevation * Transect + (1 | Sampling_point)
## Data: alpha_div_phylo_met
##
## REML criterion at convergence: 349.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6295 -0.5590 0.0322 0.5191 2.4500
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sampling_point (Intercept) 0.1054 0.3246
## Residual 0.9367 0.9679
## Number of obs: 106, groups: Sampling_point, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.514e+00 4.256e+00 1.296
## log(sequencing_depth) 4.650e-02 2.266e-01 0.205
## Elevation -9.068e-04 1.129e-03 -0.803
## TransectAran -1.544e-01 2.016e+00 -0.077
## TransectSentein -2.744e+00 1.943e+00 -1.413
## TransectTourmalet -1.228e+00 2.086e+00 -0.589
## Elevation:TransectAran -2.736e-05 1.321e-03 -0.021
## Elevation:TransectSentein 1.928e-03 1.224e-03 1.575
## Elevation:TransectTourmalet 1.124e-03 1.363e-03 0.824
##
## Correlation of Fixed Effects:
## (Intr) lg(s_) Elevtn TrnscA TrnscS TrnscT Elv:TA Elv:TS
## lg(sqncng_) -0.911
## Elevation -0.432 0.027
## TransectArn -0.423 0.071 0.862
## TransctSntn -0.315 -0.062 0.891 0.781
## TrnsctTrmlt -0.327 -0.021 0.830 0.729 0.760
## Elvtn:TrnsA 0.384 -0.040 -0.855 -0.985 -0.760 -0.709
## Elvtn:TrnsS 0.372 0.004 -0.921 -0.792 -0.982 -0.766 0.787
## Elvtn:TrnsT 0.340 -0.004 -0.827 -0.712 -0.738 -0.983 0.707 0.763
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
MuMIn::r.squaredGLMM(model)
## R2m R2c
## [1,] 0.1332427 0.2208792
#Create mixed model formula
formula <- formula(value ~ log(sequencing_depth)+Elevation*Transect+(1|Sampling_point))
#Fit the mixed model
model <- lmer(formula, data = alpha_div_func_met)
#Print the model summary
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## value ~ log(sequencing_depth) + Elevation * Transect + (1 | Sampling_point)
## Data: alpha_div_func_met
##
## REML criterion at convergence: -230.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6323 -0.3516 0.0950 0.5230 2.9850
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sampling_point (Intercept) 0.000000 0.00000
## Residual 0.002507 0.05007
## Number of obs: 106, groups: Sampling_point, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.417e+00 2.074e-01 6.832
## log(sequencing_depth) 3.346e-03 1.143e-02 0.293
## Elevation -3.409e-06 4.464e-05 -0.076
## TransectAran 6.154e-02 7.955e-02 0.774
## TransectSentein -3.855e-02 7.743e-02 -0.498
## TransectTourmalet -1.068e-02 8.347e-02 -0.128
## Elevation:TransectAran -3.642e-05 5.229e-05 -0.697
## Elevation:TransectSentein 2.425e-05 4.884e-05 0.496
## Elevation:TransectTourmalet 1.471e-05 5.476e-05 0.269
##
## Correlation of Fixed Effects:
## (Intr) lg(s_) Elevtn TrnscA TrnscS TrnscT Elv:TA Elv:TS
## lg(sqncng_) -0.943
## Elevation -0.361 0.035
## TransectArn -0.375 0.092 0.860
## TransctSntn -0.226 -0.075 0.878 0.766
## TrnsctTrmlt -0.250 -0.027 0.815 0.714 0.738
## Elvtn:TrnsA 0.330 -0.052 -0.855 -0.984 -0.748 -0.696
## Elvtn:TrnsS 0.299 0.002 -0.913 -0.783 -0.980 -0.746 0.779
## Elvtn:TrnsT 0.272 -0.004 -0.814 -0.699 -0.717 -0.982 0.696 0.744
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
MuMIn::r.squaredGLMM(model)
## R2m R2c
## [1,] 0.03886852 0.03886852
beta_colors <- c("#e5bd5b","#6b7398","#76b183","#d57d2c","#2a2d26","#f9d4cc","#3c634e","#ea68c3")
beta_q1n <- hillpair(count_table_cov_size,q=1)
beta_q1p <- hillpair(data=count_table_cov_size,q=1, tree=tree)
beta_q1f <- hillpair(count_table_cov_size_filt,q=1, dist=dist)
beta_q1n_nmds <- beta_q1n$S %>%
metaMDS(.,trymax = 500, k=2, verbosity=FALSE) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
left_join(metadata, by = join_by(sample == EHI_number))
group_n <- length(unique(beta_q1n_nmds$Transect))
beta_q1n_nmds$Elevation<-as.character(beta_q1n_nmds$Elevation)
beta_q1n_nmds %>%
group_by(region) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(., aes(x=NMDS1,y=NMDS2, color=Elevation, shape=Transect)) +
#scale_color_manual(values=beta_colors[c(1:group_n)]) +
geom_point(size=2) +
geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
theme_classic() +
theme(legend.position="right", legend.box="vertical") +
guides(color=guide_legend(title="Transect"))
beta_q1p_nmds <- beta_q1p$S %>%
metaMDS(.,trymax = 500, k=2, verbosity=FALSE) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
left_join(metadata, by = join_by(sample == EHI_number))
group_n <- length(unique(beta_q1p_nmds$Transect))
beta_q1p_nmds$Elevation<-as.character(beta_q1p_nmds$Elevation)
beta_q1p_nmds %>%
group_by(region) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(., aes(x=NMDS1,y=NMDS2, color=Elevation, shape=Transect)) +
#scale_color_manual(values=beta_colors[c(1:group_n)]) +
geom_point(size=2) +
geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
theme_classic() +
theme(legend.position="right", legend.box="vertical") +
guides(color=guide_legend(title="Transect"))
beta_q1f_nmds <- beta_q1f$S %>%
metaMDS(.,trymax = 500, k=2, verbosity=FALSE) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
left_join(metadata, by = join_by(sample == EHI_number))
group_n <- length(unique(beta_q1f_nmds$Transect))
beta_q1f_nmds$Elevation<-as.character(beta_q1f_nmds$Elevation)
beta_q1f_nmds %>%
group_by(region) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(., aes(x=NMDS1,y=NMDS2, color=Elevation, shape=Transect)) +
#scale_color_manual(values=beta_colors[c(1:group_n)]) +
geom_point(size=2) +
geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
theme_classic() +
theme(legend.position="right", legend.box="vertical") +
guides(color=guide_legend(title="Transect"))
sample_table_adonis <- metadata %>%
filter(EHI_number %in% labels(beta_q1n$S)) %>%
arrange(EHI_number) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
select(EHI_number,Transect,Sampling_point,Elevation, sample_code) %>%
select_if(~ length(unique(.)) > 1) %>% #remove columns with all-identical values
column_to_rownames(var = "EHI_number") %>%
as.data.frame()
adonis2(formula=beta_q1n$S ~ Elevation+Transect+Sampling_point, data=sample_table_adonis[labels(beta_q1n$S),], permutations=999) %>%
as.matrix() %>%
kable()
| Df | SumOfSqs | R2 | F | Pr(>F) | |
|---|---|---|---|---|---|
| Elevation | 1 | 0.795090 | 0.0253402 | 3.175817 | 0.001 |
| Transect | 3 | 2.304856 | 0.0734578 | 3.068752 | 0.001 |
| Sampling_point | 16 | 6.996253 | 0.2229768 | 1.746565 | 0.001 |
| Residual | 85 | 21.280394 | 0.6782251 | NA | NA |
| Total | 105 | 31.376593 | 1.0000000 | NA | NA |
sample_table_adonis_phylo <- metadata %>%
filter(EHI_number %in% labels(beta_q1p$S)) %>%
arrange(EHI_number) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
select(EHI_number,Transect,Sampling_point,Elevation, sample_code) %>%
select_if(~ length(unique(.)) > 1) %>% #remove columns with all-identical values
column_to_rownames(var = "EHI_number") %>%
as.data.frame()
adonis2(formula=beta_q1p$S ~ Elevation+Transect+Sampling_point, data=sample_table_adonis_phylo[labels(beta_q1p$S),], permutations=999) %>%
as.matrix() %>%
kable()
| Df | SumOfSqs | R2 | F | Pr(>F) | |
|---|---|---|---|---|---|
| Elevation | 1 | 0.1237462 | 0.0261091 | 3.077822 | 0.010 |
| Transect | 3 | 0.2692885 | 0.0568170 | 2.232585 | 0.010 |
| Sampling_point | 16 | 0.9290533 | 0.1960202 | 1.444216 | 0.023 |
| Residual | 85 | 3.4174912 | 0.7210537 | NA | NA |
| Total | 105 | 4.7395792 | 1.0000000 | NA | NA |
sample_table_adonis_func <- metadata %>%
filter(EHI_number %in% labels(beta_q1f$S)) %>%
arrange(EHI_number) %>%
mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
select(EHI_number,Transect,Sampling_point,Elevation, sample_code) %>%
select_if(~ length(unique(.)) > 1) %>% #remove columns with all-identical values
column_to_rownames(var = "EHI_number") %>%
as.data.frame()
adonis2(formula=beta_q1f$S ~ Elevation+Transect+Sampling_point, data=sample_table_adonis_func[labels(beta_q1f$S),], permutations=999) %>%
as.matrix() %>%
kable()
| Df | SumOfSqs | R2 | F | Pr(>F) | |
|---|---|---|---|---|---|
| Elevation | 1 | 0.0155891 | 0.0075003 | 1.2362357 | 0.304 |
| Transect | 3 | 0.8243893 | 0.3966328 | 21.7917515 | 0.001 |
| Sampling_point | 16 | 0.1666320 | 0.0801705 | 0.8258846 | 0.588 |
| Residual | 85 | 1.0718595 | 0.5156965 | NA | NA |
| Total | 105 | 2.0784699 | 1.0000000 | NA | NA |